April 11, 2019
— John Swain (@swainjo) March 19, 2019
lm()glm()aov()t.test()nnet::nnet()xgboost::xgboost()sparklyr::ml_random_forest()How many games would we expect to be needed to complete a best-of-seven series if each team has a 50 percent chance of winning each individual game?
How about if one team has a 60 percent chance of winning each game?
How about 70?
riddler <- function(a_pr = 0.5) {
a_wins <- 0
b_wins <- 0
while (a_wins < 4 && b_wins < 4) {
game <- sample(c("A", "B"),
size = 1,
prob = c(a_pr, 1 - a_pr))
if (game == "A") {
a_wins <- a_wins + 1
} else {
b_wins <- b_wins + 1
}
}
data.frame(a_pr = a_pr,
n_games = a_wins + b_wins)
}
set.seed(20190411) riddler()
## a_pr n_games ## 1 0.5 6
rep(c(0.5, 0.6, 0.7), each = 2)
## [1] 0.5 0.5 0.6 0.6 0.7 0.7
library(tidyverse)
set.seed(20190411)
map_dfr(rep(c(0.5, 0.6, 0.7), each = 10000),
riddler) %>%
head()
## a_pr n_games ## 1 0.5 6 ## 2 0.5 5 ## 3 0.5 6 ## 4 0.5 6 ## 5 0.5 5 ## 6 0.5 7
set.seed(20190411)
map_dfr(rep(c(0.5, 0.6, 0.7), each = 10000),
riddler) %>%
group_by(a_pr) %>%
summarize(avg_games = mean(n_games),
n = n())
## # A tibble: 3 x 3 ## a_pr avg_games n ## <dbl> <dbl> <int> ## 1 0.5 5.82 10000 ## 2 0.6 5.70 10000 ## 3 0.7 5.38 10000
while() loopsif() logiclibrary(plotly)
set.seed(20190411)
# this simulation will take longer to run
map_dfr(rep(seq(0.50, 1.00, 0.01), each = 10000),
riddler) %>%
group_by(a_pr) %>%
summarize(avg_games = mean(n_games),
n = n()) %>%
plot_ly(x = ~a_pr, y = ~avg_games, type = "scatter", mode = "lines",
text = ~avg_games, hoverinfo = "x+text") %>%
layout(title = "Expected Number of Games in Best-of-Seven Series
\nGiven Varying Win Probability",
xaxis = list(title = "Team A Win Probability"),
yaxis = list(title = "Games", range = c(0, 7)),
hovermode = "compare")
To apply or learn more visit www.colonybrands.com
justin.marschall@imsdm.com
jcmarschall
justinmarschall
justinmarschall
data()